* Analysis, PSID data

clear
set more off
set seed 1234

loc date: display %td_CCYY_NN_DD date(c(current_date), "DMY")
loc date_string = subinstr(trim("`date'"), " " , "-", .)
gl date `date_string' 	

* Merging families

use "${path}/Data/sons_data_clean.dta" , clear

merge m:1 son_id using "${path}/Data/dads_data_clean_collapse.dta"
drop if _merge==2
foreach var of varlist SEXDAD-_merge {
	ren `var' DAD_`var'
}
merge m:1 son_id using "${path}/Data/moms_data_clean_collapse.dta"
drop if _merge==2
foreach var of varlist SEXDAD-_merge {
	ren `var' MOM_`var'
}

* Restrictions
keep if FSONAGE>=25 & FSONAGE<=48 
keep if ER>=1 & ER<=2930 

* Son's income
ren LRFMONEYSONCLEAN INC
ren EDUSON EDU

* Generate parental income and age measure if son-parent household matches
gen 	HH_PINC15TO17 = DAD_PINC15TO17 if DAD_samehh_s15==1
replace HH_PINC15TO17 = DAD_PINC15TO17 if DAD_samehh_s16==1 & HH_PINC15TO17==.
replace HH_PINC15TO17 = DAD_PINC15TO17 if DAD_samehh_s17==1 & HH_PINC15TO17==.
gen     HH_DAD = 1 if HH_PINC15TO17!=. 
replace HH_DAD = 0 if HH_PINC15TO17==. 
replace HH_PINC15TO17 = MOM_PINC15TO17 if MOM_samehh_s15==1 & HH_PINC15TO17==.
replace HH_PINC15TO17 = MOM_PINC15TO17 if MOM_samehh_s16==1 & HH_PINC15TO17==.
replace HH_PINC15TO17 = MOM_PINC15TO17 if MOM_samehh_s17==1 & HH_PINC15TO17==.
replace HH_DAD = . if HH_PINC15TO17==.
ren HH_PINC15TO17 PINC

* Fill other parental variables (depending on whether dad or mom was matched)
gen COHORTPARENT=DAD_COHORTPARENT if HH_DAD==1.   
replace COHORTPARENT=MOM_COHORTPARENT if HH_DAD==0

gen AGEP=DAD_PARENTAGE15TO17 if HH_DAD==1
replace AGEP=MOM_PARENTAGE15TO17 if HH_DAD==0
gen AGEP2=AGEP^2

gen PEDU=DAD_EDUDAD  if HH_DAD==1
replace PEDU=MOM_EDUDAD  if HH_DAD==0
replace PEDU=DAD_EDUDAD if PEDU==.
replace PEDU=MOM_EDUDAD if PEDU==.

* Generate other variables
replace year=year+1900
gen yeard=year-80
gen FSONAGEd40=FSONAGE-40
gen FSONAGEd35=FSONAGE-35
gen FSONAGEd33=FSONAGE-33

gen DECADESON=10*floor(COHORTSON/10)
replace DECADESON=. if DECADESON<1950 | DECADESON>=1990
tab DECADESON 			
gen POST1960=(COHORTSON>=1960 & COHORTSON<.)
gen DECADE=10*floor(year/10)

* Tag first observation for each child
bysort son_id (FSONAGE): gen first=(_n==1)

* Start log file
cap log close
log using "${path}/Log/JPE_trends_${date}.log" , replace

* IGE by cohort
* Version: Pool incomes in age 30-35, account for deviations of IGE from age 35

* We also 
* -pool sample across sons and daughters,
* -condition on parental age and parental age squared
* -and use balanced sample with non-missing education and income for parents and kids

keep if PEDU!=. & EDU!=. & INC!=. & PINC!=.
keep if FSONAGE>=30 & FSONAGE<=35

est drop _all
forval d=1950(10)1980 {
reghdfe INC PINC c.FSONAGEd33#c.PINC AGEP AGEP2 if DECADESON==`d' , absorb(year FSONAGE) cluster(son_id)
eststoreSD
scalar ige_`d'=_b[PINC]
scalar n_`d'=e(N)
scalar ni_`d'=e(N_clust)
est store est`d'
}
estout est* using "${path}/Log/JPE_trends_${date}.scsv", title("Age 30-35, control for variation of IGE with age") ///
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) append starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust sdINC sdINC3y  sdPINC)  

* Correlation by cohort

est drop _all
forval d=1950(10)1980 {
regIGC INC PINC "c.FSONAGEd33#c.PINC AGEP AGEP2 if  DECADESON==`d' , absorb(year FSONAGE) cluster(son_id)"
est store est`d'
}
estout est* using "${path}/Log/IGC_trends_${date}.scsv", title("Age 30-35, control for variation of IGE with age") ///
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) append starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust)  

* Returns to schooling: Sons

est drop _all
forval d=1950(10)1980 {
reghdfe INC EDU c.FSONAGEd33#c.EDU AGEP AGEP2  if  DECADESON==`d' , absorb(year FSONAGE) cluster(son_id)
est store est`d'
}
estout est* using "${path}/Log/JPE_trends_${date}.scsv", title("Returns to schooling: Sons, age 30-35") /// 
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) append starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust sdINC sdINC3y  sdPINC)  
		

* Returns to schooling: Sons (structural)

forval d=1950(10)1980 {
reghdfe INC EDU c.FSONAGEd33#c.EDU PINC c.FSONAGEd33#c.PINC AGEP AGEP2 if  DECADESON==`d', absorb(year FSONAGE) cluster(son_id)
scalar rho_`d'=_b[EDU]
}

* Correlation income and schooling: Sons
est drop _all
forval d=1950(10)1980 {
regIGCedu "INC EDU c.FSONAGEd33#c.EDU AGEP AGEP2  if  DECADESON==`d' , absorb(year FSONAGE) cluster(son_id)"
est store est`d'
}
estout est* using "${path}/Log/JPE_trends_${date}.scsv", title("Correlation income and education: Sons, age 30-35") /// 
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) append starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust sdINC sdINC3y  sdPINC)  

* IGE

est drop _all
forval d=1950(10)1980 {
reghdfe INC PINC c.FSONAGEd33#c.PINC AGEP AGEP2 if DECADESON==`d' , absorb(year FSONAGE) cluster(son_id)
eststoreSD
est store est`d'
}
estout est* using "${path}/Log/IGC_trends_collapse_${date}.scsv", title("IGE") /// 
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) replace starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust)  

* IGE Education
est drop _all
forval d=1950(10)1980 {
reghdfe EDU PEDU c.FSONAGEd33#c.PEDU AGEP AGEP2 if DECADESON==`d' , absorb(FSONAGE year) cluster(son_id)
scalar lam_`d'=_b[PEDU]
eststoreEDUSD
est store est`d'
}
estout est* using "${path}/Log/IGC_trends_collapse_${date}.scsv", title("IG coefficient education") /// 
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) append starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust sdEDU sdPEDU)  
	
* IGC Education
est drop _all
forval d=1950(10)1980 {
regIGC "EDU" "PEDU" "c.FSONAGEd33#c.PEDU AGEP AGEP2 if DECADESON==`d' , absorb(FSONAGE year) cluster(son_id)"
est store est`d'
}
estout est* using "${path}/Log/IGC_trends_collapse_${date}.scsv", title("IG correlation education") /// 
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) append starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust)  

* Returns to schooling: Parents
est drop _all
forval d=1950(10)1980 {
reghdfe PINC PEDU AGEP AGEP2 if DECADESON==`d' , absorb(year) cluster(son_id)
est store est`d'
}
estout est* using "${path}/Log/IGC_trends_collapse_${date}.scsv", title("Returns to schooling: Parents") /// 
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) append starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust)  
	
* Correlation parental education and income
est drop _all
forval d=1950(10)1980 {
regIGC PINC PEDU "AGEP AGEP2 if DECADESON==`d' , absorb(year) cluster(son_id)"
est store est`d'
}
estout est* using "${path}/Log/IGC_trends_collapse_${date}.scsv", title("Correlation parental education and income") /// 
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) append starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust)  

* Covariance parental education and income
est drop _all
corr PINC PEDU if DECADESON==1950 , cov
corr PINC PEDU if DECADESON==1960 , cov
corr PINC PEDU if DECADESON==1970 , cov
corr PINC PEDU if DECADESON==1980 , cov

* Coefficient parental education on income
est drop _all
forval d=1950(10)1980 {
reghdfe PEDU PINC AGEP AGEP2 if DECADESON==`d' , absorb(year) cluster(son_id)
scalar cov_`d'=_b[PINC]
est store est`d'
}
estout est* using "${path}/Log/IGC_trends_collapse_${date}.scsv", title("Coefficient parental education on income") /// 
	drop(_cons) legend mlabels(,depvars) label cells(b(star fmt(3)) se(par(`"="("'`")""') fmt(3))) append starlevels(* 0.10 ** 0.05 *** 0.01) stats(r2 N N_clust)  

mat ige_cf=J(10,4,.)

local d = 1950

forval c=1/4 {
mat ige_cf[1,`c']=ige_`d'		// True IGE
mat ige_cf[2,`c']=lam_`d'		// Lambda
mat ige_cf[3,`c']=rho_`d'		// Rho
mat ige_cf[4,`c']=cov_`d'		// Covariance
mat ige_cf[5,`c']=lam_`d'*rho_`d'*cov_`d'				// True "Term 2"
mat ige_cf[6,`c']=ige_`d' - lam_`d'*rho_`d'*cov_`d'		// Residual part / "gamma"
mat ige_cf[7,`c']=lam_`d'*rho_`d'*cov_1950				// Counterfactual "Term 2"
mat ige_cf[8,`c']=ige_cf[6,`c']+ige_cf[7,`c']			// Counterfactual IGE
mat ige_cf[9,`c']=n_`d'									// N obs for IGE
mat ige_cf[10,`c']=ni_`d'									// N obs (i) for IGE

local d = `d'+10
}

mat coln ige_cf = 1950s 1960s 1970s 1980s
mat rown ige_cf = ige lambda rho cov term2 res cf_term2 cf_ige N_ige Ni_ige

outtable using "${path}/Log/ige_cf_${date}", mat(ige_cf) format(%9.3f) replace	

cap log close

